package com.jason.common.file.word.resolver;

import cn.hutool.core.io.FileUtil;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.text.StrPool;
import cn.hutool.core.util.StrUtil;
import com.jason.common.core.exception.BizException;
import com.jason.common.file.word.dto.WordPictureInfo;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.HashMap;
import java.util.Map;

/**
 * Word文档Xml解析器
 *
 * @author guozhongcheng
 * @since 2023/10/8
 **/
public class WordXmlResolver {

    /**
     * 转换成px单位的分母。
     * 参考博客: https://blog.csdn.net/renfufei/article/details/77481753
     */
    private final BigDecimal divisor = new BigDecimal("9525");

    private final XMLInputFactory factory = XMLInputFactory.newInstance();

    private XMLStreamReader reader;
    private XMLStreamReader filterReader;

    /**
     * word文档中的图片信息映射集合
     */
    private final Map<String, WordPictureInfo> wordPicInfoMap = new HashMap<>(16);

    public Map<String, WordPictureInfo> getWordPicInfoMap() {
        return this.wordPicInfoMap;
    }

    /**
     * 初始化Xml流
     *
     * @param xmlFilePath xml文件路径
     * @throws XMLStreamException 异常
     */
    private void init(String xmlFilePath) throws XMLStreamException {
        reader = factory.createXMLStreamReader(FileUtil.getInputStream(xmlFilePath));
        // 添加过滤器
        filterReader = factory.createFilteredReader(reader, reader1 -> {
            // 处理开始节点
            return reader1.isStartElement() || reader1.isEndElement() || reader1.isCharacters();
        });
    }

    /**
     * 解析图片映射
     *
     * @param mappingXmlFilePath 映射Xml文件路径
     * @return 图片映射集合
     * @throws Exception 异常
     */
    public Map<String, String> parseMapping(String mappingXmlFilePath) throws Exception {
        Map<String, String> map = new HashMap<>(16);
        try {
            this.init(mappingXmlFilePath);
            while (filterReader.hasNext()) {
                int type = filterReader.next();
                if (type == XMLStreamReader.START_ELEMENT) {
                    if ("Relationship".equals(reader.getName().getLocalPart())) {
                        int attributeCount = reader.getAttributeCount();
                        String id = "";
                        String target = "";
                        for (int i = 0; i < attributeCount; i++) {
                            String localPart = reader.getAttributeName(i).getLocalPart();

                            if ("Id".equalsIgnoreCase(localPart)) {
                                id = reader.getAttributeValue(i);
                            }
                            if ("Target".equalsIgnoreCase(localPart)) {
                                target = reader.getAttributeValue(i);
                            }
                        }
                        map.put(id, target);
                    }
                }
            }
        } catch (Exception e) {
            throw new BizException("解析Xml映射文件发生错误", e);
        } finally {
            // 关闭流
            if (filterReader != null) {
                filterReader.close();
            }
            if (reader != null) {
                reader.close();
            }
        }
        return MapUtil.filter(map, (item) -> item.getValue().contains("media"));
    }

    /**
     * 解析Word文档Xml文件
     * <p>
     * 图片信息详细处理：
     * 示例参数:
     * <w:drawing>
     * <wp:inline distT="0" distB="0" distL="114300" distR="114300">
     * <wp:extent cx="7099300" cy="1861820"/>
     * <wp:effectExtent l="0" t="0" r="6350" b="5080"/>
     * <wp:docPr id="2" name="Picture 2"/>
     * <wp:cNvGraphicFramePr>
     * <a:graphicFrameLocks
     * xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/>
     * </wp:cNvGraphicFramePr>
     * <a:graphic
     * xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
     * <a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">
     * <pic:pic
     * xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">
     * <pic:nvPicPr>
     * <pic:cNvPr id="2" name="Picture 2"/>
     * <pic:cNvPicPr>
     * <a:picLocks noChangeAspect="1"/>
     * </pic:cNvPicPr>
     * </pic:nvPicPr>
     * <pic:blipFill>
     * <a:blip r:embed="rId4"/>
     * <a:stretch>
     * <a:fillRect/>
     * </a:stretch>
     * </pic:blipFill>
     * <pic:spPr>
     * <a:xfrm>
     * <a:off x="0" y="0"/>
     * <a:ext cx="7099300" cy="1861820"/>
     * </a:xfrm>
     * <a:prstGeom prst="rect">
     * <a:avLst/>
     * </a:prstGeom>
     * <a:noFill/>
     * <a:ln>
     * <a:noFill/>
     * </a:ln>
     * </pic:spPr>
     * </pic:pic>
     * </a:graphicData>
     * </a:graphic>
     * </wp:inline>
     * </w:drawing>
     * 核心提取:
     * 图片序号:<pic:cNvPr id="2" name="Picture 2"/>
     * 图片索引:<a:blip r:embed="rId4"/>
     * 图片在word文档中的尺寸:<a:ext cx="7099300" cy="1861820"/>
     *
     * @param xmlFilePath xml文件路径
     * @return Word文本字符串
     * @throws Exception 解析异常
     */
    public String parseContent(String xmlFilePath) throws Exception {
        StringBuilder sb = new StringBuilder();
        try {
            this.init(xmlFilePath);
            String key = "";
            Integer id = null;
            WordPictureInfo wordPictureInfo = null;
            // 上标
            boolean sup = false;
            // 下标
            boolean sub = false;
            // 单下划线
            boolean uSingle = false;
            // 双下划线
            boolean uDouble = false;
            // 加重符号.
            boolean dot = false;
            while (filterReader.hasNext()) {
                int type = filterReader.next();
                if (type == XMLStreamReader.START_ELEMENT) {
                    String localPart = reader.getName().getLocalPart();
                    if ("vertAlign".equals(localPart)) {
                        int attributeCount = reader.getAttributeCount();
                        for (int i = 0; i < attributeCount; i++) {
                            if ("val".equals(reader.getAttributeName(i).getLocalPart())) {
                                String buoy = reader.getAttributeValue(i);
                                if ("superscript".equals(buoy)) {
                                    sup = true;
                                    sb.append("<sup>");
                                } else if ("subscript".equals(buoy)) {
                                    sub = true;
                                    sb.append("<sub>");
                                }
                            }
                        }
                    }
                    if ("u".equals(localPart)) {
                        int attributeCount = reader.getAttributeCount();
                        for (int i = 0; i < attributeCount; i++) {
                            if ("val".equals(reader.getAttributeName(i).getLocalPart())) {
                                String buoy = reader.getAttributeValue(i);
                                if ("single".equals(buoy)) {
                                    uSingle = true;
                                    sb.append("<span style=\"border-bottom:1px solid black;\">");
                                } else if ("double".equals(buoy)) {
                                    uDouble = true;
                                    sb.append("<span style=\"border-bottom:3px double black;\">");
                                }
                            }
                        }
                    }
                    if ("em".equals(localPart)) {
                        int attributeCount = reader.getAttributeCount();
                        for (int i = 0; i < attributeCount; i++) {
                            if ("val".equals(reader.getAttributeName(i).getLocalPart())) {
                                String buoy = reader.getAttributeValue(i);
                                if ("dot".equals(buoy)) {
                                    dot = true;
                                    sb.append("<span style=\"text-emphasis-style: dot; " +
                                            "text-emphasis-position: under left; \">");
                                }
                            }
                        }
                    }
                    // 获取图片的排序编号
                    if ("cNvPr".equals(localPart)) {
                        int attributeCount = reader.getAttributeCount();
                        for (int i = 0; i < attributeCount; i++) {
                            // 如果是图片或者文件则用占位符替换文本
                            if ("id".equals(reader.getAttributeName(i).getLocalPart())) {
                                id = Integer.valueOf(reader.getAttributeValue(i));
                            }
                        }
                    }
                    // 获取图片索引ID
                    if ("blip".equals(localPart)) {
                        int attributeCount = reader.getAttributeCount();
                        for (int i = 0; i < attributeCount; i++) {
                            // 如果是图片或者文件则用占位符替换文本
                            if ("embed".equals(reader.getAttributeName(i).getLocalPart())) {
                                key = reader.getAttributeValue(i) + "-" + id;
                                sb.append("${")
                                        .append(key)
                                        .append("}");
                            }
                        }
                    }
                    // 获取图片在Word文档中的尺寸信息
                    if ("ext".equals(localPart)) {
                        int attributeCount = reader.getAttributeCount();
                        for (int i = 0; i < attributeCount; i++) {
                            // 获取图片在文档中的宽度，单位:px
                            if ("cx".equals(reader.getAttributeName(i).getLocalPart())) {
                                String attributeValue = reader.getAttributeValue(i);
                                long width = Long.parseLong(attributeValue);
                                if (wordPictureInfo == null) {
                                    wordPictureInfo = new WordPictureInfo();
                                }
                                wordPictureInfo.setWidth(new BigDecimal(width).divide(divisor, 2, RoundingMode.HALF_DOWN).intValue());
                            }
                            // 获取图片在文档中的高度，单位:px
                            if ("cy".equals(reader.getAttributeName(i).getLocalPart())) {
                                String attributeValue = reader.getAttributeValue(i);
                                long height = Long.parseLong(attributeValue);
                                if (wordPictureInfo == null) {
                                    wordPictureInfo = new WordPictureInfo();
                                }
                                wordPictureInfo.setHeight(new BigDecimal(height).divide(divisor, 2, RoundingMode.HALF_DOWN).intValue());
                            }
                        }
                    }
                } else if (type == XMLStreamReader.END_ELEMENT) {
                    String localPart = reader.getName().getLocalPart();
                    // 结束段落，换行符区分
                    if ("p".equals(localPart)) {
                        sb.append("\n");
                    }
                    // 重置
                    if ("pic".equals(localPart)) {
                        key = "";
                        wordPictureInfo = null;
                        id = null;
                    }
                    if ("r".equals(localPart)) {
                        if (sup) {
                            sb.append("</sup>");
                            sup = false;
                        }
                        if (sub) {
                            sb.append("</sub>");
                            sub = false;
                        }
                        if (uSingle) {
                            sb.append("</span>");
                            uSingle = false;
                        }
                        if (uDouble) {
                            sb.append("</span>");
                            uDouble = false;
                        }
                        if (dot) {
                            sb.append("</span>");
                            dot = false;
                        }
                    }
                } else if (type == XMLStreamReader.CHARACTERS) {
                    String text = reader.getText();
                    // 拼接文本
                    sb.append(text);
                }
                // 封装图片在word文档中的尺寸信息集合
                if (StrUtil.isNotBlank(key) && wordPictureInfo != null) {
                    if (wordPictureInfo.getHeight() != 0 && wordPictureInfo.getWidth() != 0) {
                        wordPictureInfo.setId(key);
                        wordPictureInfo.setDistinctId(key.split(StrPool.DASHED)[0]);
                        wordPicInfoMap.put(key, wordPictureInfo);
                    }
                }
            }
        } catch (Exception e) {
            throw new BizException("根据Xml文件解析Word文本信息发生错误", e);
        } finally {
            // 关闭流
            if (filterReader != null) {
                filterReader.close();
            }
            if (reader != null) {
                reader.close();
            }
        }
        return sb.toString();
    }

}
